Peanut soil microbiome : Fungi

soil collected from: Wiregrass Research and Extension Center

data collected: 2021

Load packages and dependencies

library(phyloseq)
library(decontam)
library(reltools)
library(minpack.lm)
library(devtools)
## Loading required package: usethis
library(tyRa)
## 
## Attaching package: 'tyRa'
## The following objects are masked from 'package:reltools':
## 
##     fit_sncm, plot_sncm_fit
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.1.2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(Biostrings)
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:phyloseq':
## 
##     distance
## Loading required package: XVector
## Loading required package: GenomeInfoDb
## 
## Attaching package: 'Biostrings'
## The following objects are masked from 'package:Hmisc':
## 
##     mask, translate
## The following object is masked from 'package:base':
## 
##     strsplit
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Biostrings':
## 
##     collapse, intersect, setdiff, setequal, union
## The following object is masked from 'package:GenomeInfoDb':
## 
##     intersect
## The following object is masked from 'package:XVector':
## 
##     slice
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:Hmisc':
## 
##     src, summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble  3.1.6     ✔ purrr   1.0.1
## ✔ tidyr   1.1.4     ✔ stringr 1.4.0
## ✔ readr   2.1.3     ✔ forcats 0.5.1
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'purrr' was built under R version 4.1.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::collapse()        masks Biostrings::collapse(), IRanges::collapse()
## ✖ dplyr::combine()         masks BiocGenerics::combine()
## ✖ purrr::compact()         masks XVector::compact()
## ✖ dplyr::desc()            masks IRanges::desc()
## ✖ tidyr::expand()          masks S4Vectors::expand()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ dplyr::first()           masks S4Vectors::first()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ BiocGenerics::Position() masks ggplot2::Position(), base::Position()
## ✖ purrr::reduce()          masks IRanges::reduce()
## ✖ dplyr::rename()          masks S4Vectors::rename()
## ✖ dplyr::slice()           masks XVector::slice(), IRanges::slice()
## ✖ dplyr::src()             masks Hmisc::src()
## ✖ dplyr::summarize()       masks Hmisc::summarize()
library(vegan)
## Loading required package: permute
## 
## Attaching package: 'permute'
## 
## The following object is masked from 'package:devtools':
## 
##     check
## 
## This is vegan 2.5-7
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.1.2
library(readxl)
library(tibble)
library(ggtext)
## Warning: package 'ggtext' was built under R version 4.1.2
library(RColorBrewer)
## Warning: package 'RColorBrewer' was built under R version 4.1.2
library(microbiome)
## 
## microbiome R package (microbiome.github.com)
##     
## 
## 
##  Copyright (C) 2011-2020 Leo Lahti, 
##     Sudarshan Shetty et al. <microbiome.github.io>
## 
## 
## Attaching package: 'microbiome'
## 
## The following object is masked from 'package:vegan':
## 
##     diversity
## 
## The following object is masked from 'package:Biostrings':
## 
##     coverage
## 
## The following objects are masked from 'package:IRanges':
## 
##     coverage, transform
## 
## The following object is masked from 'package:S4Vectors':
## 
##     transform
## 
## The following object is masked from 'package:ggplot2':
## 
##     alpha
## 
## The following object is masked from 'package:base':
## 
##     transform

Color palette

cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
fungi.colors <- c("#c6dbef","#9ecae1","#6baed6","#3182bd","#08519c",
                           "#c7e9c0", "#a1d99b", "#74c476", "#41ab5d", "#238b45", "#005a32",
                           "#fdd0a2", "#fdae6b", "#fd8d3c", "#f16913", "#d94801", "#8c2d04",
                           "#dadaeb", "#bcbddc", "#9e9ac8", "#807dba", "#6a51a3", "#4a1486",
                           "#fcbba1", "#fc9272", "#fb6a4a", "#ef3b2c", "#cb181d", "#99000d",
                           "#d9d9d9", "#bdbdbd", "#969696", "#737373", "#525252", "#252525")

Loading data: metadata, OTU table, taxonomy

#metadata
samp_dat_fungi <- read.csv("/Users/lau/Desktop/PLPA6820/2023.02.26Gittor/phyloseq_input/metadata_libr.prepall02.01.22.csv", na.strings = "NA")

rownames(samp_dat_fungi) <- samp_dat_fungi$Sample #row names must match OTU table headers to be able to be readed
SAMP.fungi <- phyloseq::sample_data(samp_dat_fungi)

# OTU table 
otu_fungi <- read.csv("/Users/lau/Desktop/PLPA6820/2023.02.26Gittor/phyloseq_input/otu_table_ITS_UNOISE_R1.csv")
rownames(otu_fungi) <- otu_fungi$OTU
otu_fungi <- otu_fungi[,-1]
OTU.fungi <- phyloseq::otu_table(otu_fungi, taxa_are_rows = TRUE)

colnames(otu_fungi)
##   [1] "SoilAT2R1W0" "SoilAT3R1W0" "SoilBT5R1W0" "SoilAT5R1W0" "SoilAT4R1W0"
##   [6] "SoilBT3R1W0" "SoilBT1R1W0" "SoilBT4R1W0" "SoilAT1R1W0" "SoilBT2R1W0"
##  [11] "SoilAT4R2W0" "SoilAT5R2W0" "SoilBT2R2W0" "SoilBT1R2W0" "SoilAT1R2W0"
##  [16] "SoilBT3R2W0" "SoilAT3R2W0" "SoilAT2R2W0" "SoilBT5R2W0" "SoilBT4R2W0"
##  [21] "SoilBT2R3W0" "SoilAT3R3W0" "SoilAT5R3W0" "SoilBT5R3W0" "SoilBT1R3W0"
##  [26] "SoilAT4R3W0" "SoilAT1R3W0" "SoilBT3R3W0" "SoilBT4R3W0" "SoilAT2R3W0"
##  [31] "SoilBT5R4W0" "SoilAT1R4W0" "SoilAT2R4W0" "SoilAT5R4W0" "SoilBT3R4W0"
##  [36] "SoilBT4R4W0" "SoilBT1R4W0" "SoilAT3R4W0" "SoilAT4R4W0" "SoilBT2R4W0"
##  [41] "SoilAT1R5W0" "SoilBT1R5W0" "SoilBT4R5W0" "SoilAT5R5W0" "SoilBT2R5W0"
##  [46] "SoilAT4R5W0" "SoilAT2R5W0" "SoilAT3R5W0" "SoilBT3R5W0" "SoilBT5R5W0"
##  [51] "SoilBT4R6W0" "SoilAT3R6W0" "SoilAT1R6W0" "SoilAT4R6W0" "SoilBT3R6W0"
##  [56] "SoilAT2R6W0" "SoilBT5R6W0" "SoilAT5R6W0" "SoilBT1R6W0" "SoilBT2R6W0"
##  [61] "SoilAT2R1W2" "SoilAT3R1W2" "SoilBT5R1W2" "SoilAT5R1W2" "SoilAT4R1W2"
##  [66] "SoilBT3R1W2" "SoilBT1R1W2" "SoilBT4R1W2" "SoilAT1R1W2" "SoilBT2R1W2"
##  [71] "SoilAT4R2W2" "SoilAT5R2W2" "SoilBT2R2W2" "SoilBT1R2W2" "SoilAT1R2W2"
##  [76] "SoilBT3R2W2" "SoilAT3R2W2" "SoilAT2R2W2" "SoilBT5R2W2" "SoilBT4R2W2"
##  [81] "SoilBT2R3W2" "SoilAT3R3W2" "SoilAT5R3W2" "SoilBT5R3W2" "SoilBT1R3W2"
##  [86] "SoilAT4R3W2" "SoilAT1R3W2" "SoilBT3R3W2" "SoilBT4R3W2" "SoilAT2R3W2"
##  [91] "SoilBT5R4W2" "SoilAT1R4W2" "SoilAT2R4W2" "SoilAT5R4W2" "NCP1"       
##  [96] "PC1"         "SoilBT3R4W2" "SoilBT4R4W2" "NEC1"        "SoilBT1R4W2"
## [101] "SoilAT3R4W2" "SoilAT4R4W2" "SoilBT2R4W2" "SoilAT1R5W2" "SoilBT1R5W2"
## [106] "SoilBT4R5W2" "SoilAT5R5W2" "SoilBT2R5W2" "SoilAT4R5W2" "SoilAT2R5W2"
## [111] "SoilAT3R5W2" "SoilBT3R5W2" "SoilBT5R5W2" "SoilBT4R6W2" "SoilAT3R6W2"
## [116] "SoilAT1R6W2" "SoilAT4R6W2" "SoilBT3R6W2" "SoilAT2R6W2" "SoilBT5R6W2"
## [121] "SoilAT5R6W2" "SoilBT1R6W2" "SoilBT2R6W2" "SoilAT2R1W5" "SoilAT3R1W5"
## [126] "SoilBT5R1W5" "SoilAT5R1W5" "SoilAT4R1W5" "SoilBT3R1W5" "SoilBT1R1W5"
## [131] "SoilBT4R1W5" "SoilAT1R1W5" "SoilBT2R1W5" "SoilAT4R2W5" "SoilAT5R2W5"
## [136] "SoilBT2R2W5" "SoilBT1R2W5" "SoilAT1R2W5" "SoilBT3R2W5" "SoilAT3R2W5"
## [141] "SoilAT2R2W5" "SoilBT5R2W5" "SoilBT4R2W5" "SoilBT2R3W5" "SoilAT3R3W5"
## [146] "SoilAT5R3W5" "NEC2"        "SoilBT5R3W5" "SoilBT1R3W5" "SoilAT4R3W5"
## [151] "SoilAT1R3W5" "SoilBT3R3W5" "SoilBT4R3W5" "SoilAT2R3W5" "SoilBT5R4W5"
## [156] "SoilAT1R4W5" "SoilAT2R4W5" "SoilAT5R4W5" "SoilBT3R4W5" "SoilBT4R4W5"
## [161] "SoilBT1R4W5" "SoilAT3R4W5" "SoilAT4R4W5" "SoilBT2R4W5" "SoilAT1R5W5"
## [166] "SoilBT1R5W5" "SoilBT4R5W5" "SoilAT5R5W5" "SoilBT2R5W5" "SoilAT4R5W5"
## [171] "NEC3"        "SoilAT2R5W5" "SoilAT3R5W5" "SoilBT3R5W5" "SoilBT5R5W5"
## [176] "SoilBT4R6W5" "SoilAT3R6W5" "SoilAT1R6W5" "SoilAT4R6W5" "SoilBT3R6W5"
## [181] "SoilAT2R6W5" "SoilBT5R6W5" "SoilAT5R6W5" "SoilBT1R6W5" "SoilBT2R6W5"
## [186] "SoilAT2R1W7" "SoilAT3R1W7" "SoilBT5R1W7" "SoilAT5R1W7" "SoilAT4R1W7"
## [191] "NCP2"        "PC2"         "SoilBT3R1W7" "SoilBT1R1W7" "SoilBT4R1W7"
## [196] "SoilAT1R1W7" "NEC4"        "SoilBT2R1W7" "SoilAT4R2W7" "SoilAT5R2W7"
## [201] "SoilBT2R2W7" "SoilBT1R2W7" "SoilAT1R2W7" "SoilBT3R2W7" "SoilAT3R2W7"
## [206] "SoilAT2R2W7" "SoilBT5R2W7" "SoilBT4R2W7" "SoilBT2R3W7" "SoilAT3R3W7"
## [211] "SoilAT5R3W7" "SoilBT5R3W7" "SoilBT1R3W7" "SoilAT4R3W7" "SoilAT1R3W7"
## [216] "SoilBT3R3W7" "SoilBT4R3W7" "SoilAT2R3W7" "SoilBT5R4W7" "NEC5"       
## [221] "SoilAT1R4W7" "SoilAT2R4W7" "SoilAT5R4W7" "SoilBT3R4W7" "SoilBT4R4W7"
## [226] "SoilBT1R4W7" "SoilAT3R4W7" "SoilAT4R4W7" "SoilBT2R4W7" "SoilAT1R5W7"
## [231] "SoilBT1R5W7" "SoilBT4R5W7" "SoilAT5R5W7" "SoilBT2R5W7" "SoilAT4R5W7"
## [236] "SoilAT2R5W7" "SoilAT3R5W7" "SoilBT3R5W7" "SoilBT5R5W7" "SoilBT4R6W7"
## [241] "SoilAT3R6W7" "SoilAT1R6W7" "SoilAT4R6W7" "SoilBT3R6W7" "NEC6"       
## [246] "SoilAT2R6W7" "SoilBT5R6W7" "SoilAT5R6W7" "SoilBT1R6W7" "SoilBT2R6W7"
## [251] "SoilAT2R1W9" "SoilAT3R1W9" "SoilBT5R1W9" "SoilAT5R1W9" "SoilAT4R1W9"
## [256] "SoilBT3R1W9" "SoilBT1R1W9" "SoilBT4R1W9" "SoilAT1R1W9" "SoilBT2R1W9"
## [261] "SoilAT4R2W9" "SoilAT5R2W9" "SoilBT2R2W9" "SoilBT1R2W9" "SoilAT1R2W9"
## [266] "SoilBT3R2W9" "SoilAT3R2W9" "SoilAT2R2W9" "NEC7"        "SoilBT5R2W9"
## [271] "SoilBT4R2W9" "SoilBT2R3W9" "SoilAT3R3W9" "SoilAT5R3W9" "SoilBT5R3W9"
## [276] "SoilBT1R3W9" "SoilAT4R3W9" "SoilAT1R3W9" "SoilBT3R3W9" "SoilBT4R3W9"
## [281] "SoilAT2R3W9" "SoilBT5R4W9" "SoilAT1R4W9" "SoilAT2R4W9" "SoilAT5R4W9"
## [286] "SoilBT3R4W9" "NCP3"        "PC3"         "SoilBT4R4W9" "SoilBT1R4W9"
## [291] "SoilAT3R4W9" "SoilAT4R4W9" "SoilBT2R4W9" "SoilAT1R5W9" "NEC8"       
## [296] "SoilBT1R5W9" "SoilBT4R5W9" "SoilAT5R5W9" "SoilBT2R5W9" "SoilAT4R5W9"
## [301] "SoilAT2R5W9" "SoilAT3R5W9" "SoilBT3R5W9" "SoilBT5R5W9" "SoilBT4R6W9"
## [306] "SoilAT3R6W9" "SoilAT1R6W9" "SoilAT4R6W9" "SoilBT3R6W9" "SoilAT2R6W9"
## [311] "SoilBT5R6W9" "SoilAT5R6W9" "SoilBT1R6W9" "SoilBT2R6W9" "NEC9"       
## [316] "NCP4"        "NCP5"        "NCP6"        "NCP7"        "NCP8"       
## [321] "NCP9"        "NCP10"       "NCP11"       "PC4"
# Taxonomy
unite_taxonomy <-read.csv("/Users/lau/Desktop/PLPA6820/2023.02.26Gittor/phyloseq_input/taxfungi_R1_UNITE.csv",
           header = TRUE,
           row.names = 1)

Discard “unidentified” in the unite_taxonomy from kingdom and select only Kingdom fungi

We also filter the mock community

any(unite_taxonomy$Kingdom == "unidentified")
## [1] TRUE
nrow(unite_taxonomy[unite_taxonomy$Kingdom == "unidentified", ])
## [1] 2772
unite_taxonomy[unite_taxonomy$Kingdom == "unidentified", ]
unite_taxonomy %>% dplyr::filter(unite_taxonomy$Kingdom == "unidentified")
unite_taxonomy <- subset(unite_taxonomy, Kingdom %in% c("Fungi", "Mocki"))

dim(unite_taxonomy)
## [1] 3140    8
# Removing bacteria and other non-target taxa ----------------------------------------------------------------------------
head(unite_taxonomy)
levels(as.factor(unite_taxonomy$Kingdom))
## [1] "Fungi" "Mocki"
levels(as.factor(unite_taxonomy$Class))
##  [1] "Agaricomycetes"                     "Agaricostilbomycetes"              
##  [3] "Archaeorhizomycetes"                "Archaeosporomycetes"               
##  [5] "Atractiellomycetes"                 "Basidiobolomycetes"                
##  [7] "Blastocladiomycetes"                "Calcarisporiellomycetes"           
##  [9] "Chytridiomycetes"                   "Cystobasidiomycetes"               
## [11] "Dacrymycetes"                       "Dothideomycetes"                   
## [13] "Endogonomycetes"                    "Entorrhizomycetes"                 
## [15] "Eurotiomycetes"                     "Exobasidiomycetes"                 
## [17] "Geminibasidiomycetes"               "Glomeromycetes"                    
## [19] "GS13"                               "GS14"                              
## [21] "Kickxellomycetes"                   "Laboulbeniomycetes"                
## [23] "Lecanoromycetes"                    "Leotiomycetes"                     
## [25] "Lobulomycetes"                      "Malasseziomycetes"                 
## [27] "Microbotryomycetes"                 "Mockomycetes"                      
## [29] "Mortierellomycetes"                 "Mucoromycetes"                     
## [31] "Mucoromycotina_cls_Incertae_sedis"  "Olpidiomycetes"                    
## [33] "Orbiliomycetes"                     "Paraglomeromycetes"                
## [35] "Pezizomycetes"                      "Pezizomycotina_cls_Incertae_sedis" 
## [37] "Pucciniomycetes"                    "Rhizophlyctidomycetes"             
## [39] "Rhizophydiomycetes"                 "Rozellomycotina_cls_Incertae_sedis"
## [41] "Saccharomycetes"                    "Sanchytriomycetes"                 
## [43] "Sordariomycetes"                    "Spizellomycetes"                   
## [45] "Synchytriomycetes"                  "Taphrinomycetes"                   
## [47] "Tremellomycetes"                    "Umbelopsidomycetes"                
## [49] "unidentified"                       "Ustilaginomycetes"                 
## [51] "Wallemiomycetes"                    "Zoopagomycetes"
unite_taxonomy$OTU <- rownames(unite_taxonomy)

TAX.fungi.unite <- phyloseq::tax_table(as.matrix(unite_taxonomy))

Load fasta & phyloseq object

FASTA.fungi <- readDNAStringSet("/Users/lau/Desktop/PLPA6820/2023.02.26Gittor/phyloseq_input/otus_R1.fasta", seek.first.rec=TRUE, use.names=TRUE)

physeq_fungi_nonfilt <- phyloseq::phyloseq(OTU.fungi, TAX.fungi.unite, FASTA.fungi, SAMP.fungi)

Decontaminate

#this will decontaminate the data by comparing to those microbes also found in the control control samples
physeq_fungi_nonfilt@sam_data$Sample_or_Control <- ifelse(physeq_fungi_nonfilt@sam_data$Isolate.Code %in% c("NEC", "NCP"), "Control Sample", "True Sample")
sample_data(physeq_fungi_nonfilt)$is.neg <- sample_data(physeq_fungi_nonfilt)$Sample_or_Control == "Control Sample"
contamdf.prev <- isContaminant(physeq_fungi_nonfilt, method="prevalence", neg="is.neg", threshold = 0.1, normalize = TRUE)
badTaxa <- rownames(contamdf.prev[contamdf.prev$contaminant == TRUE,])

print(badTaxa)
## [1] "FOTU_3391" "FOTU_5159" "FOTU_571"
ps.pa <- transform_sample_counts(physeq_fungi_nonfilt, function(abund) 1*(abund>0))
ps.pa.neg <- prune_samples(sample_data(ps.pa)$Sample_or_Control == "Control Sample", ps.pa)
ps.pa.pos <- prune_samples(sample_data(ps.pa)$Sample_or_Control == "True Sample", ps.pa)
# Make data.frame of prevalence in positive and negative samples
df.pa <- data.frame(pa.pos=taxa_sums(ps.pa.pos), pa.neg=taxa_sums(ps.pa.neg),
                    contaminant=contamdf.prev$contaminant)
#chart name decontaminate
ggplot(data=df.pa, aes(x=pa.neg, y=pa.pos, color=contaminant)) + geom_point() +
  xlab("Prevalence (Negative Controls)") + ylab("Prevalence (True Samples)")

goodTaxa <- setdiff(taxa_names(physeq_fungi_nonfilt), badTaxa)
fungi_sub_no_bad <- prune_taxa(goodTaxa, physeq_fungi_nonfilt)

# analyse the positive controls Mock sequences

Sanity check, here we make sure that the OTUs we have are fungi

# Sanity check - we only want OTUs that are Fungi
unique(fungi_sub_no_bad@tax_table@.Data[,1])# We only want Kingdom Fungi
## [1] "Fungi" "Mocki"
fungi.obj1 <- fungi_sub_no_bad %>% 
  subset_taxa(Kingdom == "Fungi") %>%
  subset_samples(!Isolate.Code %in% c("NEC", "NCP", "PC")) %>%
  phyloseq::filter_taxa(function(x) sum(x) > 0, TRUE) # remove taxa with zero reads (i.e., those not present in objective 1)

unique(fungi.obj1@tax_table@.Data[,1])# We only want Kingdom Fungi
## [1] "Fungi"
sort(data.frame(sample_sums(fungi.obj1))[,1], decreasing = TRUE)
##   [1] 243553 192718 189876 152572 152465 148369 143627 140231 138660 137176
##  [11] 132323 131912 128118 127059 125352 125047 124503 123537 123308 122402
##  [21] 121638 120076 119148 118824 118168 117892 115718 114135 113493 112136
##  [31] 111406 110409 108759 108409 106851 106630 105862 105345 105098 104294
##  [41] 103694 103245 102640 101878 101714 101433 101414 100978 100530 100296
##  [51]  99760  99422  99186  98894  98743  98339  98023  97782  97108  96608
##  [61]  96257  95957  95582  95254  95112  94630  94416  94098  93520  93149
##  [71]  92750  92258  91803  91602  91203  90828  90755  90415  89902  89804
##  [81]  89541  89239  88787  88772  87635  86988  86451  86240  86220  85996
##  [91]  85895  85804  85656  85644  85557  85539  85504  85332  84959  83820
## [101]  83712  83206  83162  83135  82554  82527  82506  81628  81602  81258
## [111]  81092  80687  80531  80522  80521  80089  80089  80034  79950  79841
## [121]  79479  79347  78920  78830  78270  78203  78076  77898  77775  77379
## [131]  77371  77316  77210  77059  77015  76860  76354  75130  74755  74689
## [141]  74675  74560  74032  73803  73657  72934  72915  72890  72748  72304
## [151]  72263  72139  71952  71847  71650  70939  70676  70603  70382  70381
## [161]  70375  69646  69628  69414  69277  69174  68927  68897  68694  68569
## [171]  68239  67951  67898  67431  66996  66873  66858  66745  66724  66598
## [181]  66189  66130  65870  65605  65498  65235  65005  64903  64487  64455
## [191]  64148  63538  63209  62835  62658  62417  62392  62254  61498  61430
## [201]  61207  61063  60681  60629  60360  60217  60128  59707  59696  59204
## [211]  59183  59092  58889  58885  58815  58441  58290  58101  57747  57715
## [221]  57628  57549  57545  57507  57410  57288  57174  57018  56788  56743
## [231]  56651  56602  56415  55951  55567  55458  55030  54784  54057  54052
## [241]  54037  53875  53735  53593  53386  53309  53128  52842  52786  52492
## [251]  52307  52259  52138  51991  51852  51816  51613  51278  50376  50178
## [261]  49858  49740  49124  49074  48498  48266  47863  47748  47660  47554
## [271]  47315  47225  47215  46871  46858  46463  45742  45275  44661  43966
## [281]  42468  41632  41621  40680  40236  39338  39035  37994  35673  34587
## [291]  34068  33893  32864  32181  30884  30826  26795  25555    284

Filter and discard all samples with less than 5000 reads

# we are going to trash all the samples below 5,000. to make sure we take the best samples.
## FILTER OUT SAMPLES BELOW 5000 reads
fungi.obj1_5000reads <- prune_samples(sample_sums(fungi.obj1) > 5000, fungi.obj1) %>%
  phyloseq::filter_taxa(function(x) sum(x) > 0, TRUE)

Reads obtained

sum(taxa_sums(fungi.obj1_5000reads)) # 22,957,826
## [1] 22957826
#Final total for fungi - 22,957,826 reads across 298 (you got this number from:"fungi.obj1_5000reads") samples 

mean(sample_sums(fungi.obj1_5000reads)) # 77,039
## [1] 77039.68
median(sample_sums(fungi.obj1_5000reads)) # 72,526
## [1] 72526

We can use this function to save everything we run up-till now

# Save an object to a file
saveRDS(fungi.obj1_5000reads, file = "Fungi_peanut_soil_nonorm_041723.rds")

Restore the object you can start from here!!

# Restore the object. you can start from here!!
fungi.no.norm <- readRDS(file = "Fungi_peanut_soil_nonorm_041723.rds")

Rarefaction analysis

-Rarefaction is a technique to assess species richness from the results of sampling.

-Thus, allows the calculation of species richness for a given number of individual samples by using the rarefaction curve

-Rarefaction curve is a plot of the number of species as a function of the number of samples.

-Rarefaction curves generally grow rapidly at first, as the most common species are found, but the curves plateau as only the rarest species remain to be sampled.

## Rarefaction analysis
sam.data <- data.frame(fungi.no.norm@sam_data)
fOTU.table <- fungi.no.norm@otu_table
S <- specnumber(t(fOTU.table)) # observed number of species
raremax <- min(rowSums(t(fOTU.table)))
Srare <- rarefy(t(fOTU.table), raremax)
#chart name rarefaction_1
plot(S, Srare, xlab = "Observed No. of Species", ylab = "Rarefied No. of Species")
abline(0, 1)

rare.fun <- rarecurve(t(fOTU.table), step = 1000, sample = raremax, col = "blue", cex = 0.6)

fungi.rare.curve.extract <- NULL
for(i in 1:length(rare.fun)){
  sample.200 <- data.frame(rare.spec = rare.fun[[i]])
  sample.200$read_depth <- attr(rare.fun[[i]], "Subsample")
  sample.200$Sample <- rownames(t(fOTU.table[,i]))
  fungi.rare.curve.extract <- rbind.data.frame(fungi.rare.curve.extract, sample.200)
}
fungi.rare.curve.extract2 <- left_join(sam.data, fungi.rare.curve.extract, by = "Sample")

fungi.rare <- ggplot(fungi.rare.curve.extract2, aes(x = read_depth, y = rare.spec, group = Sample)) + 
  #geom_point() +
  geom_line() + 
  xlab("Reads") + 
  ylab("Number of OTUs") + 
  theme_classic() + 
  geom_vline(xintercept = 72526, linetype = "dashed") +
  ggtitle("Fungi") 
fungi.rare

Normalize based on cumulative sum scaling & phyloseq object

MGS <- phyloseq_to_metagenomeSeq(fungi.no.norm)
## Loading required namespace: metagenomeSeq
p <- metagenomeSeq::cumNormStatFast(MGS)
## Default value being used.
MGS <- metagenomeSeq::cumNorm(MGS, p =p)
metagenomeSeq::normFactors(MGS) # exports the normalized factors for each sample
## SoilAT2R1W0 SoilAT3R1W0 SoilBT5R1W0 SoilAT5R1W0 SoilBT3R1W0 SoilBT1R1W0 
##         674        1900        1953        2448        1938        2363 
## SoilBT4R1W0 SoilAT1R1W0 SoilBT2R1W0 SoilAT4R2W0 SoilAT5R2W0 SoilBT2R2W0 
##        1219        1620        1213        1162        1369        2736 
## SoilBT1R2W0 SoilAT1R2W0 SoilBT3R2W0 SoilAT3R2W0 SoilAT2R2W0 SoilBT5R2W0 
##        2172        2966        1890         912         880        1504 
## SoilBT4R2W0 SoilBT2R3W0 SoilAT3R3W0 SoilAT5R3W0 SoilBT5R3W0 SoilBT1R3W0 
##        2143        1603         974        1147         871        2329 
## SoilAT4R3W0 SoilAT1R3W0 SoilBT3R3W0 SoilBT4R3W0 SoilAT2R3W0 SoilBT5R4W0 
##        2491        2284        2255         939         761        2069 
## SoilAT1R4W0 SoilAT2R4W0 SoilAT5R4W0 SoilBT3R4W0 SoilBT4R4W0 SoilBT1R4W0 
##        1436        1952        2603        2041        1763        2150 
## SoilAT3R4W0 SoilAT4R4W0 SoilBT2R4W0 SoilAT1R5W0 SoilBT1R5W0 SoilBT4R5W0 
##        1096        2109        2047        1292         976        1244 
## SoilAT5R5W0 SoilBT2R5W0 SoilAT4R5W0 SoilAT2R5W0 SoilAT3R5W0 SoilBT3R5W0 
##        3069        2073        1591        1560         962        1736 
## SoilBT5R5W0 SoilBT4R6W0 SoilAT3R6W0 SoilAT1R6W0 SoilAT4R6W0 SoilBT3R6W0 
##        1963        2527        1544        1698        1465        2085 
## SoilAT2R6W0 SoilBT5R6W0 SoilAT5R6W0 SoilBT1R6W0 SoilBT2R6W0 SoilAT2R1W2 
##        2610        1649        1638        2401        1895        1367 
## SoilAT3R1W2 SoilBT5R1W2 SoilAT5R1W2 SoilAT4R1W2 SoilBT3R1W2 SoilBT1R1W2 
##        2015        1277        1066        2268        1732        1485 
## SoilBT4R1W2 SoilAT1R1W2 SoilBT2R1W2 SoilAT4R2W2 SoilAT5R2W2 SoilBT2R2W2 
##        1429        1154        1914        1426        1172        1977 
## SoilBT1R2W2 SoilAT1R2W2 SoilBT3R2W2 SoilAT3R2W2 SoilAT2R2W2 SoilBT5R2W2 
##        1433        2261        3728         990        1347        1149 
## SoilBT4R2W2 SoilBT2R3W2 SoilAT3R3W2 SoilAT5R3W2 SoilBT5R3W2 SoilBT1R3W2 
##        1297        1718        1235        1663        1418        1925 
## SoilAT4R3W2 SoilAT1R3W2 SoilBT3R3W2 SoilBT4R3W2 SoilAT2R3W2 SoilBT5R4W2 
##        1096         628        1921        1193        1222        1238 
## SoilAT1R4W2 SoilAT2R4W2 SoilAT5R4W2 SoilBT3R4W2 SoilBT4R4W2 SoilBT1R4W2 
##        1475         746        1251        1847        1899        1868 
## SoilAT3R4W2 SoilAT4R4W2 SoilBT2R4W2 SoilAT1R5W2 SoilBT1R5W2 SoilBT4R5W2 
##        1855        2010        2388        1889        1141        1963 
## SoilAT5R5W2 SoilBT2R5W2 SoilAT4R5W2 SoilAT2R5W2 SoilAT3R5W2 SoilBT3R5W2 
##        2214        1349        1627        2616        2016        2225 
## SoilBT5R5W2 SoilBT4R6W2 SoilAT3R6W2 SoilAT1R6W2 SoilAT4R6W2 SoilBT3R6W2 
##        1828         886         998        2191        1264        1957 
## SoilAT2R6W2 SoilBT5R6W2 SoilAT5R6W2 SoilBT1R6W2 SoilBT2R6W2 SoilAT2R1W5 
##        2566        1495         564        2130        1868         476 
## SoilAT3R1W5 SoilBT5R1W5 SoilAT5R1W5 SoilAT4R1W5 SoilBT3R1W5 SoilBT1R1W5 
##         748         760         616         503         953        1189 
## SoilBT4R1W5 SoilAT1R1W5 SoilBT2R1W5 SoilAT4R2W5 SoilAT5R2W5 SoilBT2R2W5 
##        2290        1284        2331         459         584         753 
## SoilBT1R2W5 SoilAT1R2W5 SoilBT3R2W5 SoilAT3R2W5 SoilAT2R2W5 SoilBT5R2W5 
##        1307         797         747         975         852         659 
## SoilBT4R2W5 SoilBT2R3W5 SoilAT3R3W5 SoilAT5R3W5 SoilBT5R3W5 SoilBT1R3W5 
##         976         814         658        1073        1425        1061 
## SoilAT4R3W5 SoilAT1R3W5 SoilBT3R3W5 SoilBT4R3W5 SoilAT2R3W5 SoilBT5R4W5 
##        1112        1609        1026        1628        1734         697 
## SoilAT1R4W5 SoilAT2R4W5 SoilAT5R4W5 SoilBT3R4W5 SoilBT4R4W5 SoilBT1R4W5 
##         672         486         582         949         639        1300 
## SoilAT3R4W5 SoilAT4R4W5 SoilBT2R4W5 SoilAT1R5W5 SoilBT1R5W5 SoilBT4R5W5 
##         575         623         508        1469        1219        1314 
## SoilAT5R5W5 SoilBT2R5W5 SoilAT4R5W5 SoilAT2R5W5 SoilAT3R5W5 SoilBT3R5W5 
##        1610        1633        1658        1729         739        1408 
## SoilBT5R5W5 SoilBT4R6W5 SoilAT1R6W5 SoilAT4R6W5 SoilBT3R6W5 SoilAT2R6W5 
##         831         799        1222         818        1832        1219 
## SoilBT5R6W5 SoilAT5R6W5 SoilBT1R6W5 SoilBT2R6W5 SoilAT2R1W7 SoilAT3R1W7 
##         726         793         844        1084        1659         820 
## SoilBT5R1W7 SoilAT5R1W7 SoilAT4R1W7 SoilBT3R1W7 SoilBT1R1W7 SoilBT4R1W7 
##        1441        1636        1701        2870        1975        1928 
## SoilAT1R1W7 SoilBT2R1W7 SoilAT4R2W7 SoilAT5R2W7 SoilBT2R2W7 SoilBT1R2W7 
##        1739        2306        1311         704        1230        1930 
## SoilAT1R2W7 SoilBT3R2W7 SoilAT3R2W7 SoilAT2R2W7 SoilBT5R2W7 SoilBT4R2W7 
##        1299        1332        2259         891        1166        3457 
## SoilBT2R3W7 SoilAT3R3W7 SoilAT5R3W7 SoilBT5R3W7 SoilBT1R3W7 SoilAT4R3W7 
##        1826        1153        2032        1535        2189        1402 
## SoilAT1R3W7 SoilBT3R3W7 SoilBT4R3W7 SoilAT2R3W7 SoilBT5R4W7 SoilAT1R4W7 
##        1728        1538        1943        2687        1772         640 
## SoilAT2R4W7 SoilAT5R4W7 SoilBT3R4W7 SoilBT4R4W7 SoilBT1R4W7 SoilAT3R4W7 
##         480         877        2572        1781        2137        1021 
## SoilAT4R4W7 SoilBT2R4W7 SoilAT1R5W7 SoilBT1R5W7 SoilBT4R5W7 SoilAT5R5W7 
##         897        2097        3071        2326        1330         917 
## SoilBT2R5W7 SoilAT4R5W7 SoilAT2R5W7 SoilAT3R5W7 SoilBT3R5W7 SoilBT5R5W7 
##        1773         907        1180        1363        2766         730 
## SoilBT4R6W7 SoilAT3R6W7 SoilAT1R6W7 SoilAT4R6W7 SoilBT3R6W7 SoilAT2R6W7 
##         831         811        2347         784        2047         776 
## SoilBT5R6W7 SoilAT5R6W7 SoilBT1R6W7 SoilBT2R6W7 SoilAT2R1W9 SoilAT3R1W9 
##        1014         740         747        1665         697        1212 
## SoilBT5R1W9 SoilAT5R1W9 SoilAT4R1W9 SoilBT3R1W9 SoilBT1R1W9 SoilBT4R1W9 
##        1470        1461        1223        1151        1148        1608 
## SoilAT1R1W9 SoilBT2R1W9 SoilAT4R2W9 SoilAT5R2W9 SoilBT2R2W9 SoilBT1R2W9 
##         623        1235        1634        1276        1233        1533 
## SoilAT1R2W9 SoilBT3R2W9 SoilAT3R2W9 SoilAT2R2W9 SoilBT5R2W9 SoilBT4R2W9 
##        1777        1527        1640        1188        1871        1604 
## SoilBT2R3W9 SoilAT3R3W9 SoilAT5R3W9 SoilBT5R3W9 SoilBT1R3W9 SoilAT4R3W9 
##         873        2837        1466        1445        1479        1215 
## SoilAT1R3W9 SoilBT3R3W9 SoilBT4R3W9 SoilAT2R3W9 SoilBT5R4W9 SoilAT1R4W9 
##        2600        2272        1865        1744        1329         384 
## SoilAT2R4W9 SoilAT5R4W9 SoilBT3R4W9 SoilBT4R4W9 SoilBT1R4W9 SoilAT3R4W9 
##        1693        1160        1505        1633        1684        1738 
## SoilAT4R4W9 SoilBT2R4W9 SoilAT1R5W9 SoilBT1R5W9 SoilBT4R5W9 SoilAT5R5W9 
##        1811        1986        1342        1445        1730        1266 
## SoilBT2R5W9 SoilAT4R5W9 SoilAT2R5W9 SoilAT3R5W9 SoilBT3R5W9 SoilBT5R5W9 
##        3322        2445        1197         957        2518        1903 
## SoilBT4R6W9 SoilAT3R6W9 SoilAT1R6W9 SoilAT4R6W9 SoilBT3R6W9 SoilAT2R6W9 
##        1024        1199        1314         830        1273        1402 
## SoilBT5R6W9 SoilAT5R6W9 SoilBT1R6W9 SoilBT2R6W9 
##        1141        1040         802        2868
norm.fungi <- metagenomeSeq::MRcounts(MGS, norm = T)
norm.fungi.OTU <- phyloseq::otu_table(norm.fungi, taxa_are_rows = TRUE)

fungi.css.norm <- phyloseq::phyloseq(norm.fungi.OTU, TAX.fungi.unite, FASTA.fungi, SAMP.fungi)

We save again.

## fungi CSS NORM RDS
#SAVE the fungi phyloseq object as an RDS file to load faster in future.
# Save an object to a file
saveRDS(fungi.css.norm, file = "Fungi_peanut_soil_CSS_041723.rds")

Now we don’t have to run above, we would just have to run this and continue from here!

# Restore the object
fungi.css.norm <- readRDS(file = "Fungi_peanut_soil_CSS_041723.rds")

Beta diveristy (Sample dissimilarity)

Bray-curtis distance matrix (Principal Coordinate analysis “PCoA”)

# Beta diversity 
fungi.dist.bray = phyloseq::distance(fungi.css.norm, "bray") # create bray-curtis distance matrix
fungi.ord <- ordinate(fungi.css.norm, "PCoA", "bray")
global.nmds <- plot_ordination(fungi.css.norm, ordination = fungi.ord, type = "samples") 
global.nmds.data <- global.nmds$data
#takes a long time to run
adonis2(fungi.dist.bray~Soil*as.factor(week)*as.factor(Treatment), as(sample_data(fungi.css.norm), "data.frame"), permutations = 9999) 
#beta diversity plot: Principal coordinate chart
ggplot() + 
  geom_point(data = global.nmds.data, aes(x = Axis.1, y = Axis.2, shape = as.factor(Treatment), fill = as.factor(week)), alpha = 0.8, size = 2) +
  theme_bw() +
  ylab("PCoA2") + 
  xlab("PCoA1") +
  scale_fill_manual(values=cbbPalette) +
  stat_ellipse(data = global.nmds.data, aes(x = Axis.1, y = Axis.2, group = Soil), type = "norm", linetype = 2) +
  scale_shape_manual(values=c(21, 22, 23, 24, 25)) +
  guides(fill=guide_legend(override.aes=list(shape=21))) 

Relative abundance Aspergillus flavus

-Shannon test

-Simpson test

Then we did Relative abundance of A. flavus and a grapth that depicts how A. falvus relative abundance move through the weeks. It is actually pretty low abundance but we have some spikes, we do not have an explanation, but if we take it out the outlier visibly will be better.

Some work with Aspergillus flavus: -we sub-set for this specie to obtain richness that is the number of A. flavus in the soil we sampled and we plot it (basically compare) -Shannon test -Simpson test result: phyloseq-class experiment-level object otu_table() OTU Table: [ 5 taxa and 298 samples ] sample_data() Sample Data: [ 298 samples by 23 sample variables ] tax_table() Taxonomy Table: [ 5 taxa by 8 taxonomic ranks ] refseq() DNAStringSet: [ 5 reference sequences ]

#sub-set A. flavus
aspergillus <- fungi.no.norm %>% subset_taxa(Species == "Aspergillus_flavus")
#compare aspergillus by Alpha diversity = richness (# of species in area)
# alpha diversity is the mean species diversity in a site at a local scale#
fungi.no.norm@sam_data$shannon <- estimate_richness(fungi.no.norm, measures=c("Shannon"))$Shannon
fungi.no.norm@sam_data$chao <- estimate_richness(fungi.no.norm, measures=c("Chao1"))$Chao1
fungi.no.norm@sam_data$invsimpson <- estimate_richness(fungi.no.norm, measures=c("InvSimpson"))$InvSimpson
fungi.no.norm@sam_data$sequence_depth <- as.numeric(sample_sums(fungi.no.norm))
fungi.no.norm@sam_data$richness <- estimate_richness(fungi.no.norm, measures=c("Observed"))$Observed
fungi.no.norm@sam_data$even <- fungi.no.norm@sam_data$shannon/log(fungi.no.norm@sam_data$richness)

  #Relative abundance Aspergillus flavus
  A.flavusrelativeabundance <- fungi.no.norm %>% 
    transform_sample_counts(function(x) x / sum(x) ) %>%
    psmelt()%>%
    subset(Species == "Aspergillus_flavus")
## Warning in psmelt(.): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning in psmelt(.): The rank names: 
## OTU
##  have been renamed to: 
## taxa_OTU
## to avoid conflicts with special phyloseq plot attribute names.
  ggplot(A.flavusrelativeabundance[A.flavusrelativeabundance$Abundance < 0.04,], aes(x = as.factor(week), y = Abundance, color = as.factor(Treatment))) + 
  stat_summary(fun.y=mean,geom="line", aes(group = Treatment)) + 
  stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2) +
  #geom_jitter()+
  theme_classic() +
  ylab("Relative Abundance (%)") +
  xlab("") +
  scale_color_manual(values = cbbPalette) +
  scale_y_continuous(labels = scales::percent) +
  labs(fill = "Treatment") +
  theme(legend.text = element_text(size = 10),
        legend.key = element_blank(),
        legend.title = element_text(size = 10),
        legend.position = "right", 
        strip.text.x = element_text(size = 10, vjust=2),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  facet_wrap(~Soil, scales = "free")
## Warning: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
## ℹ Please use the `fun` argument instead.

THIS IS WRONG, LEARN FROM IT!

this plot looks really good but is wrong because is stacking the abundance and suming it so is not really relative abundance. That’s why the y values are so high. Keep in mind.

ggplot(A.flavusrelativeabundance, aes(x = as.factor(week), y = Abundance, fill = as.factor(Treatment))) + 
  #facet_wrap(~week, nrow = 1, scales = "free_x", strip.position="bottom") +
  geom_bar(stat = "identity", alpha = 0.9) +
  theme_minimal() +
  ylab("Relative Abundance (%)") +
  xlab("") +
  scale_fill_manual(values = sample(fungi.colors)) +
  scale_y_continuous(labels = scales::percent) +
  labs(fill = "Treatment") +
  theme(axis.text.x = element_blank(),
        legend.text = element_text(size = 10),
        legend.key = element_blank(),
        legend.title = element_text(size = 10),
        legend.position = "right", 
        strip.text.x = element_text(size = 10, vjust=2),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank())

Top 20 fungi prevalent peanut soils

We filter for soil A and soil B. Verify they have the same 20 prevalent fungi (is it weird knowing the soils were really different in the Principal coordinate chart)

set.seed(12348)
topx.fungi <- top_taxa(fungi.no.norm, n = 20) 
fung.composition <- fungi.no.norm %>%   
  subset_taxa(OTU %in% topx.fungi) %>%   
  microbiome::transform("compositional") %>%  
  psmelt() %>%   
  group_by(Treatment, Soil, Label) %>%   
  summarise(MeanRelAbund = mean(Abundance)) %>%  
  left_join(as.data.frame(tax_table(fungi.no.norm), by = "Label")) %>%   
  ggplot(aes(Treatment, MeanRelAbund, fill = Label)) +   
  geom_bar(stat = "identity") +   
  theme_classic() +   
  scale_fill_manual(values= c(cbbPalette, fungi.colors)) +   
  scale_y_continuous(labels = scales::percent) +   
  labs(x = "", y = "Relative abundance (%)",        title = "Fungi") +   
  theme(axis.text.x = element_text(angle=45, hjust=1),        
        legend.text = element_text(face = "italic", size = 5),        
        legend.title = element_blank(),         
        legend.key.size = unit(0.3, 'cm')) 
## Warning in psmelt(.): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning in psmelt(.): The rank names: 
## OTU
##  have been renamed to: 
## taxa_OTU
## to avoid conflicts with special phyloseq plot attribute names.
## `summarise()` has grouped output by 'Treatment', 'Soil'. You can override using the `.groups` argument.
## Joining, by = "Label"
  #facet_wrap(~Soil, nrow = 1) 
fung.composition

Top 20 fungi prevalent in both peanut soils

This is to verify if running them separated would change the OTUs

fungi.no.norm.A <- fungi.no.norm %>%
  subset_samples(Soil == "A") %>%
  phyloseq::filter_taxa(function(x) sum(x) > 0, TRUE) 
#soil A
set.seed(12348)
topx.fungi.A <- top_taxa(fungi.no.norm.A, n = 20)
fungi.composition.A <- fungi.no.norm %>% 
  subset_taxa(OTU %in% topx.fungi) %>% 
  microbiome::transform("compositional") %>%  
  psmelt() %>%  
  group_by(Treatment, Label) %>%   
  summarise(MeanRelAbund = mean(Abundance)) %>%  
  left_join(as.data.frame(tax_table(fungi.no.norm), by = "Label")) %>%   
  ggplot(aes(Treatment, MeanRelAbund, fill = Label)) +   
  geom_bar(stat = "identity") +   
  theme_classic() +   
  scale_fill_manual(values= c(cbbPalette, fungi.colors)) +   
  scale_y_continuous(labels = scales::percent) +   
  labs(x = "", y = "Relative abundance (%)",        title = "Fungi") +   
  theme(axis.text.x = element_text(angle=45, hjust=1),        
        legend.text = element_text(face = "italic", size = 5),        
        legend.title = element_blank(),         
        legend.key.size = unit(0.3, 'cm'))  
## Warning in psmelt(.): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning in psmelt(.): The rank names: 
## OTU
##  have been renamed to: 
## taxa_OTU
## to avoid conflicts with special phyloseq plot attribute names.
## `summarise()` has grouped output by 'Treatment'. You can override using the `.groups` argument.
## Joining, by = "Label"
#facet_wrap(~Soil, nrow = 1) 
fungi.composition.A

fungi.no.norm.B <- fungi.no.norm %>%
  subset_samples(Soil == "B") %>%
  phyloseq::filter_taxa(function(x) sum(x) > 0, TRUE) 
#Filter for soil B
set.seed(12348)
topx.fungi.B <- top_taxa(fungi.no.norm.B, n = 20)
fungi.composition.B <- fungi.no.norm %>% 
  subset_taxa(OTU %in% topx.fungi) %>% 
  microbiome::transform("compositional") %>%  
  psmelt() %>%  
  #filter(Soil == "A") %>%
  group_by(Treatment, Label) %>%   
  summarise(MeanRelAbund = mean(Abundance)) %>%  
  left_join(as.data.frame(tax_table(fungi.no.norm), by = "Label")) %>%   
  ggplot(aes(Treatment, MeanRelAbund, fill = Label)) +   
  geom_bar(stat = "identity") +   
  theme_classic() +   
  scale_fill_manual(values= c(cbbPalette, fungi.colors)) +   
  scale_y_continuous(labels = scales::percent) +   
  labs(x = "", y = "Relative abundance (%)",        title = "Fungi") +   
  theme(axis.text.x = element_text(angle=45, hjust=1),        
        legend.text = element_text(face = "italic", size = 5),        
        legend.title = element_blank(),         
        legend.key.size = unit(0.3, 'cm'))  
## Warning in psmelt(.): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.

## Warning in psmelt(.): The rank names: 
## OTU
##  have been renamed to: 
## taxa_OTU
## to avoid conflicts with special phyloseq plot attribute names.
## `summarise()` has grouped output by 'Treatment'. You can override using the `.groups` argument.
## Joining, by = "Label"
#facet_wrap(~Soil, nrow = 1) 
fungi.composition.B

ggarrange(fungi.composition.A, fungi.composition.B, common.legend = T, labels = c("A", "B"))

Relative abundance of Mortierella species

#Relative abundance for Mortierella
#the fungi.no.norm is a phyloseq object, we need to transformed the data to relative abundance(because that's what we want for our project),
#then transform it to a data frame using psmelt(), with this, then, you can subset to A. flavus using subset(Species == "Aspergillus_flavus")
#1. you can not use the fungi.no.norm bc is not normalized, before transforming it to a data frame, do relative abundance.
mortierellaabundance <- fungi.no.norm %>% 
  #transforme into the relative abundance
  transform_sample_counts(function(x) x / sum(x) ) %>%
  #now you have relative abundance! but still in phyloseq, so, transform by usinh psmelt(). The output will be a data frame.
  #psmelt transform phyloseq to data frame
  psmelt()%>%
  #sub-set to aspergillus
  subset(Family == "Mortierellaceae")
## Warning in psmelt(.): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning in psmelt(.): The rank names: 
## OTU
##  have been renamed to: 
## taxa_OTU
## to avoid conflicts with special phyloseq plot attribute names.
#line chart
ggplot(mortierellaabundance[mortierellaabundance$Abundance < 0.04,], aes(x = as.factor(week), y = Abundance, color = as.factor(Treatment))) + 
  stat_summary(fun.y=mean,geom="line", aes(group = Treatment)) + 
  stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2) +
  #geom_jitter()+
  theme_classic() +
  ylab("Relative Abundance (%)") +
  xlab("") +
  scale_color_manual(values = cbbPalette) +
  scale_y_continuous(labels = scales::percent) +
  labs(fill = "Treatment") +
  theme(legend.text = element_text(size = 10),
        legend.key = element_blank(),
        legend.title = element_text(size = 10),
        legend.position = "right", 
        strip.text.x = element_text(size = 10, vjust=2),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  facet_wrap(~Soil, scales = "free")

Differential abundance analysis fungi

# Perform indicator species analysis just considering the original groups
set.seed(12348)
indicator.management <- indicspecies::multipatt(as.data.frame(t(fungi.css.norm@otu_table)), cluster = fungi.css.norm@sam_data$Treatment, max.order = 1)

# summary of results
summary(indicator.management, indvalcomp = TRUE)
## 
##  Multilevel pattern analysis
##  ---------------------------
## 
##  Association function: IndVal.g
##  Significance level (alpha): 0.05
## 
##  Total number of species: 3088
##  Selected number of species: 87 
##  Number of species associated to 1 group: 87 
##  Number of species associated to 2 groups: 0 
##  Number of species associated to 3 groups: 0 
##  Number of species associated to 4 groups: 0 
## 
##  List of species associated to each combination: 
## 
##  Group 1  #sps.  20 
##                 A       B  stat p.value   
## FOTU_25   0.39273 0.96667 0.616   0.005 **
## FOTU_789  0.37705 0.96667 0.604   0.005 **
## FOTU_314  0.64079 0.41667 0.517   0.010 **
## FOTU_4755 0.29504 0.58333 0.415   0.015 * 
## FOTU_580  0.30176 0.56667 0.414   0.025 * 
## FOTU_5030 0.28745 0.50000 0.379   0.050 * 
## FOTU_690  0.46742 0.28333 0.364   0.025 * 
## FOTU_942  0.68503 0.16667 0.338   0.040 * 
## FOTU_835  0.36723 0.28333 0.323   0.030 * 
## FOTU_1247 0.41580 0.23333 0.311   0.030 * 
## FOTU_1071 0.45510 0.20000 0.302   0.035 * 
## FOTU_1620 0.84811 0.08333 0.266   0.010 **
## FOTU_554  0.68383 0.10000 0.262   0.025 * 
## FOTU_5516 0.97034 0.06667 0.254   0.040 * 
## FOTU_1404 0.76868 0.08333 0.253   0.005 **
## FOTU_2279 0.62090 0.08333 0.227   0.040 * 
## FOTU_1661 1.00000 0.05000 0.224   0.045 * 
## FOTU_1927 1.00000 0.05000 0.224   0.025 * 
## FOTU_2576 1.00000 0.05000 0.224   0.050 * 
## FOTU_3633 0.87866 0.05000 0.210   0.040 * 
## 
##  Group 2  #sps.  15 
##                 A       B  stat p.value   
## FOTU_132  0.31032 0.61667 0.437   0.035 * 
## FOTU_3590 0.85175 0.15000 0.357   0.005 **
## FOTU_2046 0.50300 0.20000 0.317   0.005 **
## FOTU_2191 0.50260 0.20000 0.317   0.005 **
## FOTU_1285 0.40682 0.21667 0.297   0.050 * 
## FOTU_4901 0.93891 0.08333 0.280   0.025 * 
## FOTU_1219 0.77966 0.08333 0.255   0.010 **
## FOTU_1288 0.93040 0.06667 0.249   0.045 * 
## FOTU_1094 0.58148 0.10000 0.241   0.045 * 
## FOTU_5764 0.57903 0.10000 0.241   0.045 * 
## FOTU_3427 0.85818 0.06667 0.239   0.025 * 
## FOTU_3044 0.75190 0.06667 0.224   0.040 * 
## FOTU_2575 1.00000 0.05000 0.224   0.030 * 
## FOTU_4494 0.68777 0.06667 0.214   0.025 * 
## FOTU_3673 0.80987 0.05000 0.201   0.025 * 
## 
##  Group 3  #sps.  18 
##                 A       B  stat p.value   
## FOTU_23   0.74437 0.55932 0.645   0.015 * 
## FOTU_793  0.73724 0.52542 0.622   0.005 **
## FOTU_1365 0.51918 0.66102 0.586   0.045 * 
## FOTU_4235 0.62591 0.40678 0.505   0.015 * 
## FOTU_442  0.47203 0.33898 0.400   0.025 * 
## FOTU_930  0.35251 0.38983 0.371   0.015 * 
## FOTU_1148 0.39896 0.30508 0.349   0.005 **
## FOTU_3555 0.64358 0.11864 0.276   0.010 **
## FOTU_1291 0.60201 0.11864 0.267   0.040 * 
## FOTU_1556 1.00000 0.06780 0.260   0.005 **
## FOTU_4605 1.00000 0.06780 0.260   0.005 **
## FOTU_1030 0.64512 0.10169 0.256   0.005 **
## FOTU_5023 0.94949 0.06780 0.254   0.015 * 
## FOTU_2393 0.87570 0.06780 0.244   0.015 * 
## FOTU_2433 0.86066 0.06780 0.242   0.005 **
## FOTU_2658 0.52599 0.10169 0.231   0.045 * 
## FOTU_459  1.00000 0.05085 0.225   0.010 **
## FOTU_4175 0.63309 0.06780 0.207   0.020 * 
## 
##  Group 4  #sps.  12 
##                 A       B  stat p.value   
## FOTU_829  0.39916 0.72881 0.539   0.035 * 
## FOTU_940  0.77158 0.13559 0.323   0.005 **
## FOTU_4311 0.82342 0.11864 0.313   0.035 * 
## FOTU_409  0.80693 0.11864 0.309   0.045 * 
## FOTU_3634 0.63843 0.10169 0.255   0.025 * 
## FOTU_4719 0.52891 0.11864 0.251   0.030 * 
## FOTU_2663 0.58529 0.10169 0.244   0.015 * 
## FOTU_4082 0.60744 0.08475 0.227   0.025 * 
## FOTU_5149 1.00000 0.05085 0.225   0.010 **
## FOTU_484  0.95762 0.05085 0.221   0.040 * 
## FOTU_4568 0.89297 0.05085 0.213   0.045 * 
## FOTU_2815 0.87005 0.05085 0.210   0.025 * 
## 
##  Group 5  #sps.  22 
##                 A       B  stat p.value   
## FOTU_3095 0.29708 0.98333 0.540   0.045 * 
## FOTU_756  0.49238 0.51667 0.504   0.015 * 
## FOTU_615  0.54071 0.38333 0.455   0.005 **
## FOTU_387  0.58232 0.35000 0.451   0.015 * 
## FOTU_1097 0.39511 0.45000 0.422   0.005 **
## FOTU_675  0.76954 0.21667 0.408   0.015 * 
## FOTU_5042 0.76752 0.21667 0.408   0.015 * 
## FOTU_535  0.35605 0.45000 0.400   0.035 * 
## FOTU_656  0.30699 0.45000 0.372   0.045 * 
## FOTU_1372 0.50904 0.26667 0.368   0.015 * 
## FOTU_663  0.76694 0.16667 0.358   0.030 * 
## FOTU_3137 0.54768 0.20000 0.331   0.005 **
## FOTU_1753 0.60186 0.16667 0.317   0.015 * 
## FOTU_1492 0.90335 0.08333 0.274   0.020 * 
## FOTU_4119 0.52046 0.13333 0.263   0.015 * 
## FOTU_1571 0.75782 0.08333 0.251   0.005 **
## FOTU_1410 0.72051 0.08333 0.245   0.040 * 
## FOTU_5051 0.50215 0.10000 0.224   0.050 * 
## FOTU_2013 1.00000 0.05000 0.224   0.025 * 
## FOTU_3777 1.00000 0.05000 0.224   0.035 * 
## FOTU_2112 0.84307 0.05000 0.205   0.030 * 
## FOTU_4013 0.82590 0.05000 0.203   0.030 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
unite_taxonomy$OTU <- rownames(unite_taxonomy)
unite_taxonomy[unite_taxonomy$OTU == "BOTU_1527",]
# Explore some of these taxa, what are they? What might they do?

indicator.treatment <- indicator.management$sign
indicator.treatment2 <- indicator.treatment %>%
  subset(p.value < 0.01) %>%
  mutate(OTU = rownames(.))

indicator.treatment3 <- left_join(indicator.treatment2, unite_taxonomy, by = "OTU") 
indicator.treatment3$category <- ifelse(indicator.treatment3$index == 1, "Dry1", 
                                        ifelse(indicator.treatment3$index == 2, "Moderate2",
                                               ifelse(indicator.treatment3$index == 3, "Moderate3",
                                                      ifelse(indicator.treatment3$index == 4, "Moderate4",
                                                             ifelse(indicator.treatment3$index == 5, "Wet5", NA)))))

indicator.treatment4 <- indicator.treatment3 %>% 
  count(OTU) 
#indicator.treatment4$Phylum_other <- ifelse(indicator.treatment4$n < 10, "Other", indicator.treatment4$Phylum)  

indicator.treatment5 <- left_join(indicator.treatment3, indicator.treatment4, by = "OTU")

 q<- ggplot(indicator.treatment3, aes(x = category, fill = Label)) +
  geom_bar(position = "stack") +
  scale_fill_manual(values = c(cbbPalette, "purple", "brown", "grey", "pink", "red", "blue", "green", "cyan", "gold")) +
  theme_classic() +
  xlab("")
   #facet_wrap(~Soil, scales = "free")
  q + theme(axis.text.x = element_text(angle = 60, hjust = 1))

Core microbiome fungi

fungi.no.norm@sam_data$Sample
##   [1] "SoilAT2R1W0" "SoilAT3R1W0" "SoilBT5R1W0" "SoilAT5R1W0" "SoilBT3R1W0"
##   [6] "SoilBT1R1W0" "SoilBT4R1W0" "SoilAT1R1W0" "SoilBT2R1W0" "SoilAT4R2W0"
##  [11] "SoilAT5R2W0" "SoilBT2R2W0" "SoilBT1R2W0" "SoilAT1R2W0" "SoilBT3R2W0"
##  [16] "SoilAT3R2W0" "SoilAT2R2W0" "SoilBT5R2W0" "SoilBT4R2W0" "SoilBT2R3W0"
##  [21] "SoilAT3R3W0" "SoilAT5R3W0" "SoilBT5R3W0" "SoilBT1R3W0" "SoilAT4R3W0"
##  [26] "SoilAT1R3W0" "SoilBT3R3W0" "SoilBT4R3W0" "SoilAT2R3W0" "SoilBT5R4W0"
##  [31] "SoilAT1R4W0" "SoilAT2R4W0" "SoilAT5R4W0" "SoilBT3R4W0" "SoilBT4R4W0"
##  [36] "SoilBT1R4W0" "SoilAT3R4W0" "SoilAT4R4W0" "SoilBT2R4W0" "SoilAT1R5W0"
##  [41] "SoilBT1R5W0" "SoilBT4R5W0" "SoilAT5R5W0" "SoilBT2R5W0" "SoilAT4R5W0"
##  [46] "SoilAT2R5W0" "SoilAT3R5W0" "SoilBT3R5W0" "SoilBT5R5W0" "SoilBT4R6W0"
##  [51] "SoilAT3R6W0" "SoilAT1R6W0" "SoilAT4R6W0" "SoilBT3R6W0" "SoilAT2R6W0"
##  [56] "SoilBT5R6W0" "SoilAT5R6W0" "SoilBT1R6W0" "SoilBT2R6W0" "SoilAT2R1W2"
##  [61] "SoilAT3R1W2" "SoilBT5R1W2" "SoilAT5R1W2" "SoilAT4R1W2" "SoilBT3R1W2"
##  [66] "SoilBT1R1W2" "SoilBT4R1W2" "SoilAT1R1W2" "SoilBT2R1W2" "SoilAT4R2W2"
##  [71] "SoilAT5R2W2" "SoilBT2R2W2" "SoilBT1R2W2" "SoilAT1R2W2" "SoilBT3R2W2"
##  [76] "SoilAT3R2W2" "SoilAT2R2W2" "SoilBT5R2W2" "SoilBT4R2W2" "SoilBT2R3W2"
##  [81] "SoilAT3R3W2" "SoilAT5R3W2" "SoilBT5R3W2" "SoilBT1R3W2" "SoilAT4R3W2"
##  [86] "SoilAT1R3W2" "SoilBT3R3W2" "SoilBT4R3W2" "SoilAT2R3W2" "SoilBT5R4W2"
##  [91] "SoilAT1R4W2" "SoilAT2R4W2" "SoilAT5R4W2" "SoilBT3R4W2" "SoilBT4R4W2"
##  [96] "SoilBT1R4W2" "SoilAT3R4W2" "SoilAT4R4W2" "SoilBT2R4W2" "SoilAT1R5W2"
## [101] "SoilBT1R5W2" "SoilBT4R5W2" "SoilAT5R5W2" "SoilBT2R5W2" "SoilAT4R5W2"
## [106] "SoilAT2R5W2" "SoilAT3R5W2" "SoilBT3R5W2" "SoilBT5R5W2" "SoilBT4R6W2"
## [111] "SoilAT3R6W2" "SoilAT1R6W2" "SoilAT4R6W2" "SoilBT3R6W2" "SoilAT2R6W2"
## [116] "SoilBT5R6W2" "SoilAT5R6W2" "SoilBT1R6W2" "SoilBT2R6W2" "SoilAT2R1W5"
## [121] "SoilAT3R1W5" "SoilBT5R1W5" "SoilAT5R1W5" "SoilAT4R1W5" "SoilBT3R1W5"
## [126] "SoilBT1R1W5" "SoilBT4R1W5" "SoilAT1R1W5" "SoilBT2R1W5" "SoilAT4R2W5"
## [131] "SoilAT5R2W5" "SoilBT2R2W5" "SoilBT1R2W5" "SoilAT1R2W5" "SoilBT3R2W5"
## [136] "SoilAT3R2W5" "SoilAT2R2W5" "SoilBT5R2W5" "SoilBT4R2W5" "SoilBT2R3W5"
## [141] "SoilAT3R3W5" "SoilAT5R3W5" "SoilBT5R3W5" "SoilBT1R3W5" "SoilAT4R3W5"
## [146] "SoilAT1R3W5" "SoilBT3R3W5" "SoilBT4R3W5" "SoilAT2R3W5" "SoilBT5R4W5"
## [151] "SoilAT1R4W5" "SoilAT2R4W5" "SoilAT5R4W5" "SoilBT3R4W5" "SoilBT4R4W5"
## [156] "SoilBT1R4W5" "SoilAT3R4W5" "SoilAT4R4W5" "SoilBT2R4W5" "SoilAT1R5W5"
## [161] "SoilBT1R5W5" "SoilBT4R5W5" "SoilAT5R5W5" "SoilBT2R5W5" "SoilAT4R5W5"
## [166] "SoilAT2R5W5" "SoilAT3R5W5" "SoilBT3R5W5" "SoilBT5R5W5" "SoilBT4R6W5"
## [171] "SoilAT1R6W5" "SoilAT4R6W5" "SoilBT3R6W5" "SoilAT2R6W5" "SoilBT5R6W5"
## [176] "SoilAT5R6W5" "SoilBT1R6W5" "SoilBT2R6W5" "SoilAT2R1W7" "SoilAT3R1W7"
## [181] "SoilBT5R1W7" "SoilAT5R1W7" "SoilAT4R1W7" "SoilBT3R1W7" "SoilBT1R1W7"
## [186] "SoilBT4R1W7" "SoilAT1R1W7" "SoilBT2R1W7" "SoilAT4R2W7" "SoilAT5R2W7"
## [191] "SoilBT2R2W7" "SoilBT1R2W7" "SoilAT1R2W7" "SoilBT3R2W7" "SoilAT3R2W7"
## [196] "SoilAT2R2W7" "SoilBT5R2W7" "SoilBT4R2W7" "SoilBT2R3W7" "SoilAT3R3W7"
## [201] "SoilAT5R3W7" "SoilBT5R3W7" "SoilBT1R3W7" "SoilAT4R3W7" "SoilAT1R3W7"
## [206] "SoilBT3R3W7" "SoilBT4R3W7" "SoilAT2R3W7" "SoilBT5R4W7" "SoilAT1R4W7"
## [211] "SoilAT2R4W7" "SoilAT5R4W7" "SoilBT3R4W7" "SoilBT4R4W7" "SoilBT1R4W7"
## [216] "SoilAT3R4W7" "SoilAT4R4W7" "SoilBT2R4W7" "SoilAT1R5W7" "SoilBT1R5W7"
## [221] "SoilBT4R5W7" "SoilAT5R5W7" "SoilBT2R5W7" "SoilAT4R5W7" "SoilAT2R5W7"
## [226] "SoilAT3R5W7" "SoilBT3R5W7" "SoilBT5R5W7" "SoilBT4R6W7" "SoilAT3R6W7"
## [231] "SoilAT1R6W7" "SoilAT4R6W7" "SoilBT3R6W7" "SoilAT2R6W7" "SoilBT5R6W7"
## [236] "SoilAT5R6W7" "SoilBT1R6W7" "SoilBT2R6W7" "SoilAT2R1W9" "SoilAT3R1W9"
## [241] "SoilBT5R1W9" "SoilAT5R1W9" "SoilAT4R1W9" "SoilBT3R1W9" "SoilBT1R1W9"
## [246] "SoilBT4R1W9" "SoilAT1R1W9" "SoilBT2R1W9" "SoilAT4R2W9" "SoilAT5R2W9"
## [251] "SoilBT2R2W9" "SoilBT1R2W9" "SoilAT1R2W9" "SoilBT3R2W9" "SoilAT3R2W9"
## [256] "SoilAT2R2W9" "SoilBT5R2W9" "SoilBT4R2W9" "SoilBT2R3W9" "SoilAT3R3W9"
## [261] "SoilAT5R3W9" "SoilBT5R3W9" "SoilBT1R3W9" "SoilAT4R3W9" "SoilAT1R3W9"
## [266] "SoilBT3R3W9" "SoilBT4R3W9" "SoilAT2R3W9" "SoilBT5R4W9" "SoilAT1R4W9"
## [271] "SoilAT2R4W9" "SoilAT5R4W9" "SoilBT3R4W9" "SoilBT4R4W9" "SoilBT1R4W9"
## [276] "SoilAT3R4W9" "SoilAT4R4W9" "SoilBT2R4W9" "SoilAT1R5W9" "SoilBT1R5W9"
## [281] "SoilBT4R5W9" "SoilAT5R5W9" "SoilBT2R5W9" "SoilAT4R5W9" "SoilAT2R5W9"
## [286] "SoilAT3R5W9" "SoilBT3R5W9" "SoilBT5R5W9" "SoilBT4R6W9" "SoilAT3R6W9"
## [291] "SoilAT1R6W9" "SoilAT4R6W9" "SoilBT3R6W9" "SoilAT2R6W9" "SoilBT5R6W9"
## [296] "SoilAT5R6W9" "SoilBT1R6W9" "SoilBT2R6W9"
map <- fungi.no.norm@sam_data %>%
  as("data.frame")
# Core - abundance occupancy modeling- Peanut
core.prioritizing <- function(phyloseq.object){
  
  set.seed(19)
  rare.phyloseq.object <- rarefy_even_depth(phyloseq.object, replace=TRUE)
  
  nReads=sample_sums(rare.phyloseq.object)[[1]]            # input dataset needs to be rarified and the rarifaction depth included
  otu <- rare.phyloseq.object@otu_table %>%
    as("matrix")
  map <- rare.phyloseq.object@sam_data %>%
    as("data.frame")
  
  otu_PA <- 1*((otu>0)==1)                                               # presence-absence data
  otu_occ <- rowSums(otu_PA)/ncol(otu_PA)                                # occupancy calculation
  otu_rel <- apply(decostand(otu, method="total", MARGIN=2),1, mean)     # mean relative abundance
  occ_abun <- add_rownames(as.data.frame(cbind(otu_occ, otu_rel)),'otu') # combining occupancy and abundance data frame
  
  # Ranking OTUs based on their occupancy
  # For caluclating raking index we included following conditions:
  #   - time-specific occupancy (sumF) = frequency of detection within time point (genotype or site)
  #   - replication consistency (sumG) = has occupancy of 1 in at least one time point (genotype or site) (1 if occupancy 1, else 0)
  
  PresenceSum <- data.frame(otu = as.factor(row.names(otu)), otu) %>%
    gather(Sample, abun, -otu) %>%
    left_join(map, by = 'Sample') %>% #edit for sample id column in metadata
    group_by(otu, week) %>% #edit for time point column in metadata
    dplyr::summarise(time_freq=sum(abun>0)/length(abun),            # frequency of detection between time points
                     coreTime=ifelse(time_freq == 1, 1, 0)) %>%     # 1 only if occupancy 1 with specific time, 0 if not
    group_by(otu) %>%
    dplyr::summarise(sumF=sum(time_freq),
                     sumG=sum(coreTime),
                     nS=length(week)*2,  #edit for time point column in metadata        
                     Index=(sumF+sumG)/nS)                 # calculating weighting Index based on number of time points detected and
  
  otu_ranked <- occ_abun %>%
    left_join(PresenceSum, by='otu') %>%
    transmute(otu=otu,
              rank=Index) %>%
    arrange(desc(rank))
  
  # Calculating the contribution of ranked OTUs to the BC similarity
  BCaddition <- NULL
  
  # calculating BC dissimilarity based on the 1st ranked OTU
  # with 36 samples there should be 630 combinations n!/r!
  otu_start=otu_ranked$otu[1]                  
  start_matrix <- as.matrix(otu[otu_start,])
  start_matrix <- t(start_matrix)
  x <- apply(combn(ncol(start_matrix), 2), 2, function(x) sum(abs(start_matrix[,x[1]]- start_matrix[,x[2]]))/(2*nReads))
  x_names <- apply(combn(ncol(start_matrix), 2), 2, function(x) paste(colnames(start_matrix)[x], collapse=' - '))
  df_s <- data.frame(x_names,x)
  df_s$rank_count <- 1
  BCaddition <- rbind(BCaddition,df_s)
  # calculating BC dissimilarity based on additon of ranked OTUs from 2nd to 500th. Can be set to the entire length of OTUs in the dataset, however it might take some time if more than 5000 OTUs are included.
  for(i in 2:500){                              
    otu_add=otu_ranked$otu[i]                      
    add_matrix <- as.matrix(otu[otu_add,])
    add_matrix <- t(add_matrix)
    start_matrix <- rbind(start_matrix, add_matrix)
    x <- apply(combn(ncol(start_matrix), 2), 2, function(x) sum(abs(start_matrix[,x[1]]-start_matrix[,x[2]]))/(2*nReads))
    #x_names <- apply(combn(ncol(start_matrix), 2), 2, function(x) paste(colnames(start_matrix)[x], collapse=' - '))
    df_a <- data.frame(x_names,x)
    df_a$rank_count <- i 
    BCaddition <- rbind.data.frame(BCaddition, df_a)
  }
  # calculating the BC dissimilarity of the whole dataset (not needed if the second loop is already including all OTUs)
  x <-  apply(combn(ncol(otu), 2), 2, function(x) sum(abs(otu[,x[1]]-otu[,x[2]]))/(2*nReads))  
  x_names <- apply(combn(ncol(otu), 2), 2, function(x) paste(colnames(otu)[x], collapse=' - '))
  df_full <- data.frame(x_names,x)
  df_full$rank_count <- length(rownames(otu))
  BCfull <- rbind.data.frame(BCaddition, df_full)
  
  BC_ranked <- BCfull %>%
    group_by(rank_count) %>%
    dplyr::summarise(MeanBC=mean(x)) %>%            # mean Bray-Curtis dissimilarity
    arrange(desc(-MeanBC)) %>%
    mutate(proportionBC=MeanBC/max(MeanBC))   # proportion of the dissimilarity explained by the n number of ranked OTUs
  Increase=BC_ranked$MeanBC[-1]/BC_ranked$MeanBC[-length(BC_ranked$MeanBC)]
  increaseDF <- data.frame(IncreaseBC=c(0,(Increase)), rank=factor(c(1:(length(Increase)+1))))
  increaseDF$rank <- as.numeric(increaseDF$rank)
  BC_ranked <- left_join(BC_ranked, increaseDF, by = c("rank_count" = "rank"))
  BC_ranked <- BC_ranked[-nrow(BC_ranked),]
  
  #Creating threshold for core inclusion - last call method
  
  #B) Final increase in BC similarity of equal or greater then 2%
  lastCall <- last(as.numeric(BC_ranked$rank_count[(BC_ranked$IncreaseBC>=1.02)]))
  
  #Creating plot of Bray-Curtis similarity
  plot <- ggplot(BC_ranked[1:100,], aes(x=factor(BC_ranked$rank_count[1:100], levels=BC_ranked$rank_count[1:100]))) +
    geom_point(aes(y=proportionBC)) +
    theme_classic() + theme(strip.background = element_blank(),axis.text.x = element_text(size=7, angle=45)) +
    geom_vline(xintercept=last(as.numeric(BC_ranked$rank_count[(BC_ranked$IncreaseBC>=1.02)])), lty=3, col='black', cex=.5) +
    labs(x='ranked OTUs',y='Bray-Curtis similarity') +
    annotate(geom="text", x=last(as.numeric(BC_ranked$rank[(BC_ranked$IncreaseBC>=1.02)]))+3, y=.5, label=paste("Last 2% increase (",last(as.numeric(BC_ranked$rank[(BC_ranked$IncreaseBC>=1.02)])),")",sep=''), color="black")
  
  core.otus.CSS.mean.T1 <- otu_ranked$otu[1:lastCall]
  return_list <- list(core.otus.CSS.mean.T1, plot, otu_ranked, occ_abun)
  return(return_list)
}

#Takes a long time
fungi.core <- core.prioritizing(fungi.no.norm)
## You set `rngseed` to FALSE. Make sure you've set & recorded
##  the random seed of your session for reproducibility.
## See `?set.seed`
## ...
## 96OTUs were removed because they are no longer 
## present in any sample after random subsampling
## ...
## Warning: `add_rownames()` was deprecated in dplyr 1.0.0.
## ℹ Please use `tibble::rownames_to_column()` instead.
## `summarise()` has grouped output by 'otu'. You can override using the `.groups`
## argument.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## Warning: Unknown or uninitialised column: `rank`.
## Unknown or uninitialised column: `rank`.

Save it so we don’t have to run such a long function

saveRDS(fungi.core, file = "fungi.no.norm.core_check_point01.11.23.rds")

Restore

#Restore the object
fungi.core <- readRDS(file = "fungi.no.norm.core_check_point01.11.23.rds")

Core graph

So, if this data I am using for the core does not include the soils, how can I include the soils that is not merge bc we have subseted a lot so I don’t want to add everything here. I would like to have soils and do a ggarange of the core microbiome for both to see if anything stands out.

fungi.core[[1]]
##   [1] "FOTU_1"    "FOTU_1091" "FOTU_16"   "FOTU_162"  "FOTU_1686" "FOTU_17"  
##   [7] "FOTU_2"    "FOTU_263"  "FOTU_3"    "FOTU_32"   "FOTU_3358" "FOTU_4156"
##  [13] "FOTU_468"  "FOTU_471"  "FOTU_5"    "FOTU_542"  "FOTU_5905" "FOTU_6"   
##  [19] "FOTU_67"   "FOTU_794"  "FOTU_886"  "FOTU_993"  "FOTU_1109" "FOTU_1606"
##  [25] "FOTU_512"  "FOTU_530"  "FOTU_1418" "FOTU_757"  "FOTU_4112" "FOTU_70"  
##  [31] "FOTU_8"    "FOTU_18"   "FOTU_79"   "FOTU_7"    "FOTU_1545" "FOTU_875" 
##  [37] "FOTU_1500" "FOTU_899"  "FOTU_1207" "FOTU_30"   "FOTU_58"   "FOTU_85"  
##  [43] "FOTU_54"   "FOTU_1360" "FOTU_63"   "FOTU_187"  "FOTU_10"   "FOTU_1630"
##  [49] "FOTU_1257" "FOTU_1909" "FOTU_1168" "FOTU_15"   "FOTU_64"   "FOTU_1627"
##  [55] "FOTU_968"  "FOTU_226"  "FOTU_2048" "FOTU_5628" "FOTU_36"   "FOTU_1861"
##  [61] "FOTU_924"  "FOTU_46"   "FOTU_1139" "FOTU_1018" "FOTU_78"   "FOTU_27"  
##  [67] "FOTU_98"   "FOTU_61"   "FOTU_24"   "FOTU_469"  "FOTU_39"   "FOTU_13"  
##  [73] "FOTU_789"  "FOTU_174"  "FOTU_2001" "FOTU_959"  "FOTU_25"   "FOTU_45"  
##  [79] "FOTU_232"  "FOTU_188"  "FOTU_5419" "FOTU_3185" "FOTU_2134" "FOTU_2878"
##  [85] "FOTU_29"   "FOTU_1051" "FOTU_4154" "FOTU_3095" "FOTU_147"  "FOTU_49"  
##  [91] "FOTU_888"  "FOTU_9"    "FOTU_467"  "FOTU_84"   "FOTU_124"  "FOTU_1413"
##  [97] "FOTU_1581" "FOTU_122"  "FOTU_1369" "FOTU_1249" "FOTU_119"  "FOTU_612" 
## [103] "FOTU_983"  "FOTU_1601" "FOTU_11"   "FOTU_77"   "FOTU_2261" "FOTU_31"  
## [109] "FOTU_129"  "FOTU_1619" "FOTU_190"  "FOTU_791"  "FOTU_4"    "FOTU_2132"
## [115] "FOTU_610"  "FOTU_111"  "FOTU_161"  "FOTU_332"
library(tyRa)
set.seed(19)
rare.phyloseq.object <- rarefy_even_depth(fungi.no.norm, replace=TRUE)
## You set `rngseed` to FALSE. Make sure you've set & recorded
##  the random seed of your session for reproducibility.
## See `?set.seed`
## ...
## 96OTUs were removed because they are no longer 
## present in any sample after random subsampling
## ...
nReads=sample_sums(rare.phyloseq.object)[[1]]                                                                 # input dataset needs to be rarified and the rarifaction depth included 
otu <- rare.phyloseq.object@otu_table %>%
  as("matrix")
taxa <- rownames(otu)
map <- rare.phyloseq.object@sam_data %>%
  as("data.frame")
spp.out <- tyRa::fit_sncm(spp = t(otu), pool=NULL, taxon=taxa)
## Waiting for profiling to be done...
predictions <- spp.out$predictions
predictions$otu <- rownames(predictions)

# Abundance-Occupancy
taxonomy <- fungi.no.norm@tax_table %>%
  as("matrix") %>%
  as_tibble() %>%
  mutate(otu = rownames(fungi.no.norm@tax_table))

abund.occ3 <- left_join(taxonomy, predictions, by = "otu") 

abund.occ3$core <- ifelse(abund.occ3$otu %in% fungi.core[[1]], "Core", "Not Core")

library(ggrepel)

core <- ggplot() +
  geom_point(data = abund.occ3, aes(x = log10(p), y = freq, color = fit_class, shape = core), alpha = 0.8, size = 2) +
  geom_line(color='black', data=abund.occ3, size=1, aes(y=abund.occ3$freq.pred, x=log10(abund.occ3$p)), alpha=.25) +
  geom_line(color='black', lty='twodash', size=1, data=abund.occ3, aes(y=abund.occ3$pred.upr, x=log10(abund.occ3$p)), alpha=.25)+
  geom_line(color='black', lty='twodash', size=1, data=abund.occ3, aes(y=abund.occ3$pred.lwr, x=log10(abund.occ3$p)), alpha=.25)+
  labs(x="log10(Mean relative abundance)", y="Occupancy") + 
  theme_classic() + 
  scale_color_manual(values = c("#000000", "#E69F00", "#56B4E9")) +
  geom_text_repel(data = abund.occ3[abund.occ3$core == "Core" & abund.occ3$fit_class == "Below prediction",], 
                  aes(x = log10(p), y = freq, label = Label))
plot(core)

Load packages and dependencies

library(gganimate)
library(gganimate)
library(ggplot2)
library(gganimate)
library('gifski')
library('png')
library(gapminder)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:XVector':
## 
##     slice
## The following object is masked from 'package:IRanges':
## 
##     slice
## The following object is masked from 'package:S4Vectors':
## 
##     rename
## The following object is masked from 'package:Hmisc':
## 
##     subplot
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(transformr)
library(gapminder)
library(ggplot2)
library(gganimate)
library('gifski')
library('png')

Interactive core graph:

coreInteractive <-ggplot() +
  geom_point(data = abund.occ3, aes(x = log10(p), y = freq,  shape = core, color = fit_class, text = paste("Species:", Species)), alpha = 0.8, size = 1) +
  #geom_point(aes(shape= "Species")) +
  geom_line(color='black', data=abund.occ3, size=1, aes(y=abund.occ3$freq.pred, x=log10(abund.occ3$p)), alpha=.25) +
  geom_line(color='black', lty='twodash', size=1, data=abund.occ3, aes(y=abund.occ3$pred.upr, x=log10(abund.occ3$p)), alpha=.25)+
  geom_line(color='black', lty='twodash', size=1, data=abund.occ3, aes(y=abund.occ3$pred.lwr, x=log10(abund.occ3$p)), alpha=.25)+
  labs(x="log10(Mean relative abundance)", y="Occupancy") + 
  theme_classic() + 
  scale_color_manual(values = c("#000000", "#E69F00", "#56B4E9")) +
  geom_text_repel(data = abund.occ3[abund.occ3$core == "Core" & abund.occ3$fit_class == "Below prediction",], 
                  aes(x = log10(p), y = freq, label = Label))
ggplotly(coreInteractive)